The data are available on line. We download the CSV file.
cookies <- read.csv("http://facweb1.redlands.edu/fac/jim_bentley/data/math311/poissoncookies/keebler2017.csv", header=TRUE)
cookies$Student <- as.factor(cookies$Student)
head(cookies)
## Chips Bag Student
## 1 8 Bugs 1
## 2 8 Bugs 1
## 3 7 Bugs 1
## 4 10 Bugs 1
## 5 9 Bugs 1
## 6 13 Bugs 1
We compute descriptive statistics. The use of “by” makes computing stats for subsets of the data easy.
sum(cookies$Chips)
## [1] 490
length(cookies$Chips)
## [1] 53
mean(cookies$Chips)
## [1] 9.245283
var(cookies$Chips)
## [1] 6.534833
### Now subset by bag
by(cookies$Chips, cookies$Bag, sum)
## cookies$Bag: Bugs
## [1] 223
## --------------------------------------------------------
## cookies$Bag: Mixed
## [1] 36
## --------------------------------------------------------
## cookies$Bag: NoBugs
## [1] 231
by(cookies$Chips, cookies$Bag, mean)
## cookies$Bag: Bugs
## [1] 9.291667
## --------------------------------------------------------
## cookies$Bag: Mixed
## [1] 9
## --------------------------------------------------------
## cookies$Bag: NoBugs
## [1] 9.24
by(cookies$Chips, cookies$Bag, var)
## cookies$Bag: Bugs
## [1] 7.780797
## --------------------------------------------------------
## cookies$Bag: Mixed
## [1] 0.6666667
## --------------------------------------------------------
## cookies$Bag: NoBugs
## [1] 6.606667
table(cookies$Chips)
##
## 5 6 7 8 9 10 11 12 13 14 16
## 5 2 6 8 9 9 3 5 3 2 1
table(cookies$Chips, cookies$Bag) ### Compute counts
##
## Bugs Mixed NoBugs
## 5 3 0 2
## 6 1 0 1
## 7 3 0 3
## 8 4 1 3
## 9 1 2 6
## 10 4 1 4
## 11 1 0 2
## 12 3 0 2
## 13 3 0 0
## 14 1 0 1
## 16 0 0 1
### Plots
p_load(lattice)
histogram(~Chips|Bag, data=cookies, layout=c(1,3), type="count")